source("Downstream.v2.R")
# loading libraries
library(Seurat)
library(rrrSingleCellUtils)
library(ggplot2)
library(msigdbr)
library(dplyr)
Load Seurat objects here so only have to complete once (commented out elsewhere).
# Create Seurat objects and perform initial QC. Label original source. osteoblasts
osteoblasts <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0031/outs/filtered_feature_bc_matrix/")
osteoblasts <- subset(osteoblasts, subset = nCount_RNA < 20000 & percent.mt < 10)
osteoblasts$src <- "osteoblasts"
osteoblasts$model <- "osteoblasts"
# OS17 Culture
os17_cx_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0016/outs/filtered_feature_bc_matrix/",
species_pattern = "^hg19-")
os17_cx_raw <- subset(os17_cx_raw, subset = nFeature_RNA > 3500 & nCount_RNA < 50000 & percent.mt <
15)
os17_cx_raw$src <- "Culture"
os17_cx_raw$model <- "OS-17"
## Tibia
os17_tib_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0018xS0028/outs/filtered_feature_bc_matrix/",
species_pattern = "^hg19-")
os17_tib_raw <- subset(os17_tib_raw, subset = nFeature_RNA > 3000 & nCount_RNA < 60000 & percent.mt <
18)
os17_tib_raw$src <- "Tibia"
os17_tib_raw$model <- "OS-17"
## Lung double check usage throughout rest of code - Emily (fixed issue where coming from
## wrong file)
os17_lung_raw <- tenx_load_qc(path_10x = "/gpfs0/home2/gdrobertslab/lab/Counts/S0024xS0029/outs/filtered_feature_bc_matrix",
species_pattern = "^hg19-")
os17_lung_raw <- subset(os17_lung_raw, subset = nFeature_RNA > 1250 & nCount_RNA < 60000 & percent.mt <
25)
os17_lung_raw$src <- "Lung"
os17_lung_raw$model <- "OS-17"
os17_cx <- NormalizeData(os17_cx_raw) %>%
FindVariableFeatures(selection.method = "vst") %>%
ScaleData() %>%
RunPCA(pc.genes = os17_cx@var.genes, npcs = 20) %>%
RunUMAP(reduction = "pca", dims = 1:20) %>%
FindNeighbors(reduction = "pca", dims = 1:20) %>%
FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3178
## Number of edges: 117232
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8912
## Number of communities: 8
## Elapsed time: 0 seconds
DimPlot(os17_cx, reduction = "umap", pt.size = 1, label = T) + coord_fixed() + scale_color_npg(alpha = 0.7)
# Regress out the effects of cell cycle on these tumor cells
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
os17_cx <- CellCycleScoring(object = os17_cx, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
os17_cx <- ScaleData(object = os17_cx, vars.to.regress = c("S.Score", "G2M.Score"), features = VariableFeatures(os17_cx),
block.size = 10000)
os17_cx <- RunPCA(os17_cx, pc.genes = os17_cx@var.genes, npcs = 20) %>%
RunUMAP(reduction = "pca", dims = 1:20) %>%
FindNeighbors(reduction = "pca", dims = 1:20) %>%
FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3178
## Number of edges: 119731
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8908
## Number of communities: 5
## Elapsed time: 0 seconds
# Find optimal clustering resolution
os17_cx_nc <- nRes(os17_cx, res = seq(from = 0.1, to = 0.2, by = 0.05))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3178
## Number of edges: 119731
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9457
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3178
## Number of edges: 119731
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9302
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3178
## Number of edges: 119731
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9152
## Number of communities: 5
## Elapsed time: 0 seconds
plot <- pSil(os17_cx_nc, 0.15)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3178
## Number of edges: 119731
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9302
## Number of communities: 4
## Elapsed time: 0 seconds
## NULL
plot
## NULL
# With res = 0.15, cells in cluster 3 do not have any significantly different genes that are
# deferentially regulated Therefore, we decided to proceed with res = 0.1
os17_cx <- FindClusters(os17_cx, resolution = 0.1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3178
## Number of edges: 119731
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9457
## Number of communities: 3
## Elapsed time: 0 seconds
if (!file.exists("Data")) {
dir.create("Data")
}
save(os17_cx, file = "Data/os17_cx_CCR.RData")
# Subset all to 2800 cells in each condition
os17_cx <- subset(os17_cx_raw, cells = sample(Cells(os17_cx_raw), 2800))
os17.tib <- subset(os17_tib_raw, cells = sample(Cells(os17_tib_raw), 2800))
os17.lung <- subset(os17_lung_raw, cells = sample(Cells(os17_lung_raw), 2800))
# Merge into a single Seurat object
os17 <- merge(os17_cx, y = c(os17.tib, os17.lung), add.cell_ids = c("Culture", "Tibia", "Lung"),
project = "LineageTracing")
# Process and cluster
os17 <- NormalizeData(os17) %>%
FindVariableFeatures(selection.method = "vst") %>%
ScaleData() %>%
RunPCA(pc.genes = os17@var.genes, npcs = 20) %>%
RunUMAP(reduction = "pca", dims = 1:20) %>%
FindNeighbors(reduction = "pca", dims = 1:20) %>%
FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 8400
## Number of edges: 289959
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9048
## Number of communities: 8
## Elapsed time: 0 seconds
# CCR Attempt to regress out the effects of cell cycle on these tumor cells
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
os17 <- CellCycleScoring(object = os17, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
os17 <- ScaleData(object = os17, vars.to.regress = c("S.Score", "G2M.Score"), features = VariableFeatures(os17),
block.size = 10000)
os17 <- RunPCA(os17, pc.genes = os17@var.genes, npcs = 20) %>%
RunUMAP(reduction = "pca", dims = 1:20) %>%
FindNeighbors(reduction = "pca", dims = 1:20) %>%
FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 8400
## Number of edges: 280874
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8977
## Number of communities: 7
## Elapsed time: 0 seconds
DimPlot(os17, reduction = "umap", pt.size = 1, label = T) + coord_fixed() + ggtitle("OS17 Clusters") +
scale_color_npg(alpha = 0.7)
# Plot the data
set.seed(100)
cell_ids <- sample(colnames(os17))
DimPlot(os17, reduction = "umap", group.by = "src", pt.size = 1, label = F, order = cell_ids) +
coord_fixed() + ggtitle("OS17 by Source") + scale_color_npg()
save(os17, file = "Data/os17.RData")
# t143b Culture
t143b_cx_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0017/outs/filtered_feature_bc_matrix/",
species_pattern = "^hg19-")
t143b_cx_raw <- subset(t143b_cx_raw, subset = nFeature_RNA > 4000 & nCount_RNA < 74000 & percent.mt <
17)
t143b_cx_raw$src <- "Culture"
t143b_cx_raw$model <- "143B"
## Tibia
t143b_tib_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0052/outs/filtered_feature_bc_matrix/",
species_pattern = "^hg19-")
t143b_tib_raw <- subset(t143b_tib_raw, subset = nFeature_RNA > 1000 & nCount_RNA < 30000 & percent.mt <
22)
t143b_tib_raw$src <- "Tibia"
t143b_tib_raw$model <- "143B"
# Lung
t143b_lung_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0019-143B-lung/outs/filtered_feature_bc_matrix/",
species_pattern = "^hg19-")
t143b_lung_raw <- subset(t143b_lung_raw, subset = nFeature_RNA > 1000 & nCount_RNA < 30000 & percent.mt <
22)
t143b_lung_raw$src <- "Lung"
t143b_lung_raw$model <- "143B"
# NCHOS2 Flank
os2_cx_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0076/outs/filtered_feature_bc_matrix/",
species_pattern = "^hg19-")
os2_cx_raw <- subset(os2_cx_raw, subset = nCount_RNA < 36000 & percent.mt < 50)
os2_cx_raw$src <- "Flank"
os2_cx_raw$model <- "NCH-OS-2"
## Tibia
os2_tib_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0042/outs/filtered_feature_bc_matrix/",
species_pattern = "^hg19-")
os2_tib_raw <- subset(os2_tib_raw, subset = nFeature_RNA > 2500 & nCount_RNA < 40000 & percent.mt <
20)
os2_tib_raw$src <- "Tibia"
os2_tib_raw$model <- "NCH-OS-2"
## Lung
os2_lung_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0041/outs/filtered_feature_bc_matrix/",
species_pattern = "^hg19-")
os2_lung_raw <- subset(os2_lung_raw, subset = nFeature_RNA > 2500 & nCount_RNA < 60000 & percent.mt <
30)
os2_lung_raw$src <- "Lung"
os2_lung_raw$model <- "NCH-OS-2"
# NCHOS7 Flank
os7_cx_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0043/outs/filtered_feature_bc_matrix/",
species_pattern = "^hg19-")
os7_cx_raw <- subset(os7_cx_raw, subset = nCount_RNA < 50000 & percent.mt < 25)
os7_cx_raw$src <- "Flank"
os7_cx_raw$model <- "NCH-OS-7"
## Tibia
os7_tib_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0034/outs/filtered_feature_bc_matrix/",
species_pattern = "^hg19-")
os7_tib_raw <- subset(os7_tib_raw, subset = nFeature_RNA > 2000 & nCount_RNA < 35000 & percent.mt <
14)
os7_tib_raw$src <- "Tibia"
os7_tib_raw$model <- "NCH-OS-7"
## Lung
os7_lung_raw <- tenx_load_qc(path_10x = "/gpfs0/home/gdrobertslab/lab/Counts/S0055/outs/filtered_feature_bc_matrix/",
species_pattern = "^hg19-")
os7_lung_raw <- subset(os7_lung_raw, subset = nCount_RNA < 42000 & percent.mt < 20)
os7_lung_raw$src <- "Lung"
os7_lung_raw$model <- "NCH-OS-7"
os7_cx <- NormalizeData(os7_cx_raw) %>%
FindVariableFeatures(selection.method = "vst") %>%
ScaleData() %>%
RunPCA(pc.genes = os7_cx@var.genes, npcs = 20) %>%
RunUMAP(reduction = "pca", dims = 1:20) %>%
FindNeighbors(reduction = "pca", dims = 1:20) %>%
FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1998
## Number of edges: 68728
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8807
## Number of communities: 6
## Elapsed time: 0 seconds
DimPlot(os7_cx, reduction = "umap", pt.size = 1, label = T) + coord_fixed() + scale_color_npg(alpha = 0.7)
# Attempt to regress out the effects of cell cycle on these tumor cells
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
os7_cx <- CellCycleScoring(object = os7_cx, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
os7_cx <- ScaleData(object = os7_cx, vars.to.regress = c("S.Score", "G2M.Score"), features = VariableFeatures(os7_cx),
block.size = 10000)
os7_cx <- RunPCA(os7_cx, pc.genes = os7_cx@var.genes, npcs = 20) %>%
RunUMAP(reduction = "pca", dims = 1:20) %>%
FindNeighbors(reduction = "pca", dims = 1:20) %>%
FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1998
## Number of edges: 69766
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8676
## Number of communities: 5
## Elapsed time: 0 seconds
# Find optimal clustering resolution
os7_cx_nc <- nRes(os7_cx, res = seq(from = 0.1, to = 0.15, by = 0.01))
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1998
## Number of edges: 69766
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9248
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1998
## Number of edges: 69766
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9195
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1998
## Number of edges: 69766
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9152
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1998
## Number of edges: 69766
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9123
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1998
## Number of edges: 69766
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9095
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1998
## Number of edges: 69766
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9067
## Number of communities: 4
## Elapsed time: 0 seconds
plot <- pSil(os7_cx_nc, 0.15)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1998
## Number of edges: 69766
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9067
## Number of communities: 4
## Elapsed time: 0 seconds
## NULL
plot
## NULL
os7_cx <- FindClusters(os7_cx, resolution = 0.15)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1998
## Number of edges: 69766
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9067
## Number of communities: 4
## Elapsed time: 0 seconds
save(os7_cx, file = "Data/os7_cx_CCR.RData")
# Save raw objects
# Make lists for easy loop saving
sample_list <- c(osteoblasts, os17_cx_raw, os17_tib_raw, os17_lung_raw, t143b_cx_raw, t143b_tib_raw,
t143b_lung_raw, os2_cx_raw, os2_tib_raw, os2_lung_raw, os7_cx_raw, os7_tib_raw, os7_lung_raw)
sample_names <- c("osteoblasts", "os17_cx_raw", "os17_tib_raw", "os17_lung_raw", "t143b_cx_raw",
"t143b_tib_raw", "t143b_lung_raw", "os2_cx_raw", "os2_tib_raw", "os2_lung_raw", "os7_cx_raw",
"os7_tib_raw", "os7_lung_raw")
# Correlate sample names and sample labels
names(sample_list) <- sample_names
# Save each raw object
for (item in sample_names) {
# Save each sample as an individual Seurat object with proper name
assign(item, sample_list[[item]])
save(list = item, file = paste("Data/", item, ".RData", sep = ""))
}
Merge objects, process, and cell cycle regress.
# Merge into a single Seurat object
OS <- merge(osteoblasts, y = c(os17_cx_raw, os17_tib_raw, os17_lung_raw, t143b_cx_raw, t143b_tib_raw,
t143b_lung_raw, os2_cx_raw, os2_tib_raw, os2_lung_raw, os7_cx_raw, os7_tib_raw, os7_lung_raw),
add.cell_ids = c("Osteoblasts", "OS-17_Culture", "OS-17_Tibia", "OS-17_Lung", "143B_Culture",
"143B_Tibia", "143B_Lung", "NCH-OS-2_Flank", "NCH-OS-2_Tibia", "NCH-OS-2_Lung", "NCH-OS-7_Flank",
"NCH-OS-7_Tibia", "NCH-OS-7_Lung"), project = "Heterogeneity")
# Process and cluster
OS <- NormalizeData(OS) %>%
FindVariableFeatures(selection.method = "vst") %>%
ScaleData() %>%
RunPCA(pc.genes = os.17@var.genes, npcs = 20) %>%
RunUMAP(reduction = "pca", dims = 1:20) %>%
FindNeighbors(reduction = "pca", dims = 1:20) %>%
FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 54300
## Number of edges: 1853982
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9614
## Number of communities: 10
## Elapsed time: 15 seconds
# rm(osteoblasts, os17_cx_raw, os17_tib_raw, os17_lung_raw, t143b_cx_raw, t143b_tib_raw,
# t143b_lung_raw, os2_cx_raw, os2_tib_raw, os2_lung_raw, os7_cx_raw, os7_tib_raw,
# os7_lung_raw)
# CCR Attempt to regress out the effects of cell cycle on these tumor cells
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
OS <- CellCycleScoring(object = OS, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
OS <- ScaleData(object = OS, vars.to.regress = c("S.Score", "G2M.Score"), features = VariableFeatures(OS),
block.size = 10000)
OS <- RunPCA(OS, pc.genes = OS@var.genes, npcs = 20) %>%
RunUMAP(reduction = "pca", dims = 1:20) %>%
FindNeighbors(reduction = "pca", dims = 1:20) %>%
FindClusters(resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 54300
## Number of edges: 1824736
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9610
## Number of communities: 9
## Elapsed time: 14 seconds
save(OS, file = "Data/OS_merged_postCCR.RData")
OS$model <- as.factor(OS$model)
OS$model <- factor(as.factor(OS$model), levels = c("OS-17", "143B", "NCH-OS-2", "NCH-OS-7", "Osteoblasts"))
# Plot the data
DimPlot(OS, reduction = "umap", group.by = "model", pt.size = 1, label = T) + coord_fixed() + ggtitle("OS by Model") +
scale_color_npg(alpha = 1)
DimPlot(OS, reduction = "umap", group.by = "src", pt.size = 1, label = T) + coord_fixed() + ggtitle("OS by Source")
DimPlot(OS, reduction = "umap", pt.size = 1, label = T) + coord_fixed() + ggtitle("OS by Clusters") +
scale_color_npg(alpha = 0.7)
List objects, process, and cell cycle regress. Used in Figure 3B - ITH code.
# Create List to compute ITH scores (subset to 1500 cells for each)
OS.list_1500 <- list(osteoblasts = osteoblasts, OS17.cx = os17_cx_raw, OS17.Tibia = os17_tib_raw,
OS17.Lung = os17_lung_raw, t143B.cx = t143b_cx_raw, t143B.Tibia = t143b_tib_raw, t143B.Lung = t143b_lung_raw,
OS2.Flank = os2_cx_raw, OS2.Tibia = os2_tib_raw, OS2.Lung = os2_lung_raw, OS7.Flank = os7_cx_raw,
OS7.Tibia = os7_tib_raw, OS7.Lung = os7_lung_raw)
for (i in 1:length(OS.list_1500)) {
OS.list_1500[[i]] <- NormalizeData(OS.list_1500[[i]]) %>%
FindVariableFeatures(selection.method = "vst") %>%
ScaleData() %>%
RunPCA(pc.genes = OS.list_1500[[i]]@var.genes, npcs = 20) %>%
RunUMAP(reduction = "pca", dims = 1:20) %>%
FindNeighbors(reduction = "pca", dims = 1:20) %>%
FindClusters(resolution = 0.3) %>%
subset(cells = sample(Cells(OS.list_1500[[i]]), 1500))
}
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5735
## Number of edges: 201174
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8642
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3178
## Number of edges: 117232
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8912
## Number of communities: 8
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2849
## Number of edges: 103732
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8568
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4574
## Number of edges: 156586
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8373
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3363
## Number of edges: 123143
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8215
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6763
## Number of edges: 226939
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8477
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5134
## Number of edges: 181392
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8631
## Number of communities: 8
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 6749
## Number of edges: 222345
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8705
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4285
## Number of edges: 148107
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8520
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1648
## Number of edges: 57506
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8356
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1998
## Number of edges: 68728
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8807
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2891
## Number of edges: 102969
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8567
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5133
## Number of edges: 177023
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8680
## Number of communities: 9
## Elapsed time: 0 seconds
# CCR Attempt to regress out the effects of cell cycle on these tumor cells
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
for (i in 1:length(OS.list_1500)) {
OS.list_1500[[i]] <- CellCycleScoring(object = OS.list_1500[[i]], s.features = s.genes, g2m.features = g2m.genes,
set.ident = TRUE)
OS.list_1500[[i]] <- ScaleData(object = OS.list_1500[[i]], vars.to.regress = c("S.Score", "G2M.Score"),
features = VariableFeatures(OS.list_1500[[i]]), block.size = 10000) # Change to VariableFeatures()
OS.list_1500[[i]] <- RunPCA(OS.list_1500[[i]], pc.genes = OS.list_1500[[i]]@var.genes, npcs = 20) %>%
RunUMAP(reduction = "pca", dims = 1:20) %>%
FindNeighbors(reduction = "pca", dims = 1:20) %>%
FindClusters(resolution = 0.3)
}
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 61069
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8390
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 61975
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8650
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 56704
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8091
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 56977
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7771
## Number of communities: 5
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 58610
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7691
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 60734
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7791
## Number of communities: 4
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 56073
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7987
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 52995
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8059
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 52729
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7782
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 51652
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7883
## Number of communities: 5
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 51919
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8649
## Number of communities: 5
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 55164
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8026
## Number of communities: 3
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1500
## Number of edges: 56741
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8093
## Number of communities: 4
## Elapsed time: 0 seconds
save(OS.list_1500, file = "Data/OSlist_1500_CCR.RData")
List objects, process, and cell cycle regress. Used in Figure 3 C-D code.
# Create list for usage in Figure 3
tibia_list <- list(OS17.Tibia = os17_tib_raw, t143B.Tibia = t143b_tib_raw, OS2.Tibia = os2_tib_raw,
OS7.Tibia = os7_tib_raw)
lung_list <- list(OS17.Lung = os17_lung_raw, t143B.Lung = t143b_lung_raw, OS2.Lung = os2_lung_raw,
OS7.Lung = os7_lung_raw)
OS_list <- list()
for (i in 1:length(tibia_list)) {
message(i, head(paste(tibia_list[[i]]$model, "_TL", sep = ""), 1))
# Subset number of cells in each Seurat object to the least in the group
a <- table(tibia_list[[i]]$orig.ident)
b <- table(lung_list[[i]]$orig.ident)
list <- c(a, b)
min <- min(list)
tibia_list[[i]] <- subset(tibia_list[[i]], cells = sample(Cells(tibia_list[[i]]), min))
lung_list[[i]] <- subset(lung_list[[i]], cells = sample(Cells(lung_list[[i]]), min))
title <- head(paste(tibia_list[[i]]$model, "_TL", sep = ""), 1)
title <- gsub("-", "", title)
OS_list[[title]] <- merge(tibia_list[[i]], y = c(lung_list[[i]]), add.cell_ids = c(head(paste(tibia_list[[i]]$model,
"_", tibia_list[[i]]$src, sep = ""), 1), head(paste(lung_list[[i]]$model, "_", lung_list[[i]]$src,
sep = ""), 1)), project = "Heterogeneity")
}
# CCR Attempt to regress out the effects of cell cycle on these tumor cells
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
for (i in 1:length(OS_list)) {
OS_list[[i]] <- NormalizeData(OS_list[[i]]) %>%
FindVariableFeatures(selection.method = "vst") %>%
ScaleData() %>%
RunPCA(pc.genes = OS_list[[i]]@var.genes, npcs = 20) %>%
RunUMAP(reduction = "pca", dims = 1:20) %>%
FindNeighbors(reduction = "pca", dims = 1:20) %>%
FindClusters(resolution = 0.3)
OS_list[[i]] <- CellCycleScoring(object = OS_list[[i]], s.features = s.genes, g2m.features = g2m.genes,
set.ident = TRUE)
OS_list[[i]] <- ScaleData(object = OS_list[[i]], vars.to.regress = c("S.Score", "G2M.Score"),
features = VariableFeatures(OS_list[[i]]), block.size = 10000) # Change to VariableFeatures()
OS_list[[i]] <- RunPCA(OS_list[[i]], pc.genes = OS_list[[i]]@var.genes, npcs = 20) %>%
RunUMAP(reduction = "pca", dims = 1:20) %>%
FindNeighbors(reduction = "pca", dims = 1:20) %>%
FindClusters(resolution = 0.3)
}
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5698
## Number of edges: 195555
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8596
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5698
## Number of edges: 190346
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8444
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10268
## Number of edges: 352757
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8850
## Number of communities: 7
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10268
## Number of edges: 343259
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8657
## Number of communities: 7
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3296
## Number of edges: 114815
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8568
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3296
## Number of edges: 110588
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8283
## Number of communities: 8
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5782
## Number of edges: 203134
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8971
## Number of communities: 7
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5782
## Number of edges: 201063
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8855
## Number of communities: 5
## Elapsed time: 0 seconds
save(OS_list, file = "Data/OS_list_CCR.RData")
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux 8.4 (Ootpa)
##
## Matrix products: default
## BLAS: /gpfs0/export/apps/opt/R/4.1.0-foss-2020a/lib64/R/lib/libRblas.so
## LAPACK: /gpfs0/export/apps/opt/R/4.1.0-foss-2020a/lib64/R/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] cluster_2.1.4 msigdbr_7.5.1 rrrSingleCellUtils_0.5.0
## [4] nichenetr_1.1.1 RColorBrewer_1.1-3 clusterProfiler_4.2.2
## [7] ggplot2_3.4.0 reshape2_1.4.4 SeuratObject_4.1.3
## [10] Seurat_4.3.0 reticulate_1.26 Matrix_1.5-3
## [13] dplyr_1.0.10 cowplot_1.1.1 ggsci_2.9
##
## loaded via a namespace (and not attached):
## [1] scattermore_0.8 ModelMetrics_1.2.2.2 R.methodsS3_1.8.2
## [4] tidyr_1.2.1 bit64_4.0.5 knitr_1.41
## [7] R.utils_2.12.2 irlba_2.3.5.1 data.table_1.14.6
## [10] rpart_4.1.19 KEGGREST_1.34.0 hardhat_1.2.0
## [13] RCurl_1.98-1.6 doParallel_1.0.17 generics_0.1.3
## [16] BiocGenerics_0.40.0 RSQLite_2.2.19 shadowtext_0.1.2
## [19] RANN_2.6.1 proxy_0.4-27 future_1.29.0
## [22] DiagrammeR_1.0.9 bit_4.0.5 tzdb_0.3.0
## [25] enrichplot_1.14.2 spatstat.data_3.0-0 lubridate_1.9.0
## [28] httpuv_1.6.6 assertthat_0.2.1 viridis_0.6.2
## [31] gower_1.0.0 xfun_0.35 hms_1.1.2
## [34] jquerylib_0.1.4 babelgene_22.9 evaluate_0.18
## [37] promises_1.2.0.1 fansi_1.0.3 caTools_1.18.2
## [40] igraph_1.3.1 DBI_1.1.3 htmlwidgets_1.5.4
## [43] spatstat.geom_3.0-3 stats4_4.1.0 purrr_0.3.5
## [46] ellipsis_0.3.2 backports_1.4.1 deldir_1.0-6
## [49] vctrs_0.5.1 Biobase_2.54.0 ROCR_1.0-11
## [52] abind_1.4-5 caret_6.0-93 cachem_1.0.6
## [55] withr_2.5.0 ggforce_0.4.1 progressr_0.11.0
## [58] checkmate_2.1.0 sctransform_0.3.5 treeio_1.18.1
## [61] fdrtool_1.2.17 goftest_1.2-3 DOSE_3.20.1
## [64] ape_5.6-2 lazyeval_0.2.2 crayon_1.5.2
## [67] spatstat.explore_3.0-5 labeling_0.4.2 recipes_1.0.3
## [70] pkgconfig_2.0.3 tweenr_2.0.2 GenomeInfoDb_1.30.1
## [73] nlme_3.1-160 nnet_7.3-18 rlang_1.0.6
## [76] globals_0.16.2 lifecycle_1.0.3 miniUI_0.1.1.1
## [79] downloader_0.4 randomForest_4.7-1.1 polyclip_1.10-4
## [82] matrixStats_0.63.0 lmtest_0.9-40 aplot_0.1.9
## [85] zoo_1.8-11 base64enc_0.1-3 ggridges_0.5.4
## [88] GlobalOptions_0.1.2 png_0.1-8 viridisLite_0.4.1
## [91] rjson_0.2.21 bitops_1.0-7 R.oo_1.25.0
## [94] KernSmooth_2.23-20 visNetwork_2.1.2 pROC_1.18.0
## [97] Biostrings_2.62.0 blob_1.2.3 shape_1.4.6
## [100] stringr_1.5.0 qvalue_2.26.0 parallelly_1.32.1
## [103] spatstat.random_3.0-1 jpeg_0.1-9 readr_2.1.3
## [106] gridGraphics_0.5-1 S4Vectors_0.32.4 scales_1.2.1
## [109] memoise_2.0.1 magrittr_2.0.3 plyr_1.8.8
## [112] ica_1.0-3 zlibbioc_1.40.0 compiler_4.1.0
## [115] scatterpie_0.1.8 clue_0.3-63 fitdistrplus_1.1-8
## [118] cli_3.4.1 XVector_0.34.0 listenv_0.8.0
## [121] patchwork_1.1.2 pbapply_1.6-0 htmlTable_2.4.1
## [124] formatR_1.12 Formula_1.2-4 MASS_7.3-58.1
## [127] tidyselect_1.2.0 stringi_1.7.8 highr_0.9
## [130] yaml_2.3.6 GOSemSim_2.20.0 latticeExtra_0.6-30
## [133] ggrepel_0.9.2 grid_4.1.0 sass_0.4.4
## [136] fastmatch_1.1-3 tools_4.1.0 timechange_0.1.1
## [139] future.apply_1.10.0 parallel_4.1.0 rstudioapi_0.14
## [142] circlize_0.4.15 foreign_0.8-83 foreach_1.5.2
## [145] gridExtra_2.3 prodlim_2019.11.13 farver_2.1.1
## [148] Rtsne_0.16 ggraph_2.1.0 digest_0.6.30
## [151] shiny_1.7.3 lava_1.7.0 Rcpp_1.0.9
## [154] later_1.3.0 RcppAnnoy_0.0.20 httr_1.4.4
## [157] AnnotationDbi_1.56.2 ComplexHeatmap_2.10.0 colorspace_2.0-3
## [160] tensor_1.5 IRanges_2.28.0 splines_4.1.0
## [163] uwot_0.1.14 yulab.utils_0.0.5 tidytree_0.4.1
## [166] spatstat.utils_3.0-1 graphlayouts_0.8.4 sp_1.5-1
## [169] ggplotify_0.1.0 plotly_4.10.1 xtable_1.8-4
## [172] jsonlite_1.8.4 ggtree_3.2.1 tidygraph_1.2.2
## [175] timeDate_4021.106 ggfun_0.0.9 ipred_0.9-13
## [178] R6_2.5.1 Hmisc_4.7-2 pillar_1.8.1
## [181] htmltools_0.5.4 mime_0.12 glue_1.6.2
## [184] fastmap_1.1.0 BiocParallel_1.28.3 class_7.3-20
## [187] codetools_0.2-18 fgsea_1.20.0 utf8_1.2.2
## [190] lattice_0.20-45 bslib_0.4.1 spatstat.sparse_3.0-0
## [193] tibble_3.1.8 leiden_0.4.3 interp_1.1-3
## [196] GO.db_3.14.0 limma_3.50.3 survival_3.4-0
## [199] rmarkdown_2.18 munsell_0.5.0 e1071_1.7-12
## [202] DO.db_2.9 GetoptLong_1.0.5 GenomeInfoDbData_1.2.7
## [205] iterators_1.0.14 gtable_0.3.1